Discovering geographical distribution of Grab hailing services in Singapore

Author

Jin Yuan

Published

22/01/2024

Modified

25/01/2024

Background

In this exploration, we investigate the geographical and spatio-temporal distribution of Grab hailing services in Singapore, leveraging the rich dataset provided by GRAB known as Grab Posisi. As a significant shared taxi operator in Southeast Asia, GRAB’s dataset offers a unique perspective on human mobility. This study focuses on applying Spatial Point Pattern Analyses (KDE/NKDE) to discern patterns within the dataset.

Install Packages & Importing Data

Install Necessary Packages

For this exercise, we will be using the following packages:

sf for importing, managing, and processing geospatial data through simple features.

tidyverse for comprehensive data science tasks, including importing, wrangling, and visualizing spatial data.

spatstat for point pattern analysis, offering a wide range of functions for exploring spatial patterns.

spNetwork for analyzing spatial networks and their properties.

classInt for determining class intervals for mapping and visualization purposes.

viridis for providing color maps suitable for creating visually appealing visualizations.

maptools for manipulating geographic data, offering a set of tools for various spatial operations.

raster for reading, writing, manipulating, and analyzing gridded spatial data in a raster format.

pacman::p_load(tmap, sf, tidyverse, spatstat, spNetwork, classInt, viridis,
               maptools, raster)

Importing Data

GrabPosisi Data (Pick Up Location)

#GrabPosisi Data
origin_df <- read_rds("/dljyuan/IS415-GAA/data/rds/origin_df.rds")
#destination_df <- read_rds("/dljyuan/IS415-GAA/data/rds/destination_df.rds")

Boundary Data (Coastal Outline)

mpsz_sf <- st_read(dsn = "../data/geospatial/",
                   layer="MP14_SUBZONE_WEB_PL")
Reading layer `MP14_SUBZONE_WEB_PL' from data source 
  `C:\dljyuan\IS415-GAA\data\geospatial' using driver `ESRI Shapefile'
Simple feature collection with 323 features and 15 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
sg_sf <- mpsz_sf%>%
  st_union()
st_geometry(sg_sf)
Geometry set for 1 feature 
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 2667.538 ymin: 15748.72 xmax: 56396.44 ymax: 50256.33
Projected CRS: SVY21
MULTIPOLYGON (((17763.39 15889.1, 17758.6 15868...
sg_sf <- st_make_valid(sg_sf)
length(which(st_is_valid(sg_sf) == FALSE))
[1] 0

Road Layer Data (Singapore) The following include data from Singapore, Malaysia & Brunei

road <- st_read(dsn="../data/geospatial",
                   layer="gis_osm_roads_free_1")

Convert CRS of Road Data to be the same as the Coastal Outline (SVY21)

road <- st_transform(road, crs = st_crs(sg_sf))
st_geometry(sg_sf)
st_geometry(road)

Filter out unnecessary variables

#road <- road[, c("name", "geometry")]
origin_df <- origin_df[, c("trj_id", "rawlat", "rawlng", "accuracy", "weekday", "start_hr", "day")]

Extracting road data from Singapore only

singapore_roads <- st_intersection(road, sg_sf)
Write Data for Future Extraction
write_rds(singapore_roads, "/dljyuan/IS415-GAA/data/rds/singapore_roads.rds")

Import SG Road Layers

singapore_roads <- read_rds("/dljyuan/IS415-GAA/data/rds/singapore_roads.rds")

Data Wrangling

Convert Data CRS to 3414 SVY21

sg_sf <- st_set_crs(sg_sf, 3414)
Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
that
singapore_roads <- st_set_crs(singapore_roads, 3414)
Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
that
st_crs(sg_sf)
Coordinate Reference System:
  User input: EPSG:3414 
  wkt:
PROJCRS["SVY21 / Singapore TM",
    BASEGEOGCRS["SVY21",
        DATUM["SVY21",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4757]],
    CONVERSION["Singapore Transverse Mercator",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",1.36666666666667,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",103.833333333333,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",1,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",28001.642,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",38744.572,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Cadastre, engineering survey, topographic mapping."],
        AREA["Singapore - onshore and offshore."],
        BBOX[1.13,103.59,1.47,104.07]],
    ID["EPSG",3414]]
st_crs(singapore_roads)
Coordinate Reference System:
  User input: EPSG:3414 
  wkt:
PROJCRS["SVY21 / Singapore TM",
    BASEGEOGCRS["SVY21",
        DATUM["SVY21",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4757]],
    CONVERSION["Singapore Transverse Mercator",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",1.36666666666667,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",103.833333333333,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",1,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",28001.642,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",38744.572,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["northing (N)",north,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["easting (E)",east,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Cadastre, engineering survey, topographic mapping."],
        AREA["Singapore - onshore and offshore."],
        BBOX[1.13,103.59,1.47,104.07]],
    ID["EPSG",3414]]
tmap_mode('plot')
tm_shape(singapore_roads) +
  tm_lines()
tmap_mode('plot')

Convert Latitude & Longtitude to Cartisians

origin_df <- st_as_sf(origin_df, 
                       coords = c("rawlng", "rawlat"),
                       crs=4326) %>%
  st_transform(crs = 3414)
st_geometry(origin_df)
tmap_mode('plot')
tm_shape(sg_sf) +
  tm_polygons() +
  tm_shape(origin_df) +
  tm_dots()

Convert to Spatial Class

origin <- as_Spatial(origin_df)
sg <- as_Spatial(sg_sf)
#singapore_roads <- as_Spatial(singapore_roads)

Convert to Spatial Object

origin_sp <- as(origin, "SpatialPoints")
sg_sp <- as(sg, "SpatialPolygons")

Convert to ppp Object

origin_ppp <- as(origin_sp, "ppp")

Check for duplicate or overlap points

tm_shape(sg_sf) +
  tm_polygons() +
  tm_shape(origin) +
  tm_dots(alpha=0.4, 
          size=0.02)

any(duplicated(origin_ppp))
[1] FALSE
sum(multiplicity(origin_ppp) > 1)
[1] 0

Convert to owin Object

sg_owin <- as(sg_sp, "owin")

Combine Grabposisi Data with Coastal Outline

origin_ppp = origin_ppp[sg_owin]
plot(origin_ppp)

Exploratory Data Analysis

Kernel Density Estimation (KDE)

# Convert data to km as our unit of measurement
origin_ppp.km <- rescale(origin_ppp, 1000, "km")

Selecting Kernel Method

Baddeley et. (2016) suggested the use of the bw.ppl() algorithm because in ther experience it tends to produce the more appropriate values when the pattern consists predominantly of tight clusters. But they also insist that if the purpose of once study is to detect a single tight cluster in the midst of random noise then the bw.diggle() method seems to work best.

par(mfrow=c(1,2))
plot(density(origin_ppp.km, 
  sigma=bw.diggle, 
  edge=TRUE,
  kernel="gaussian"),
  main = "bw.diggle")

plot(density(origin_ppp.km, 
  sigma=bw.ppl, 
  edge=TRUE,
  kernel="gaussian"),
  main = "bw.ppl")

par(mfrow=c(2,2))
plot(density(origin_ppp.km, 
             sigma=bw.ppl, 
             edge=TRUE, 
             kernel="gaussian"), 
     main="Gaussian")
plot(density(origin_ppp.km, 
             sigma=bw.ppl, 
             edge=TRUE, 
             kernel="epanechnikov"), 
     main="Epanechnikov")
plot(density(origin_ppp.km, 
             sigma=bw.ppl, 
             edge=TRUE, 
             kernel="quartic"), 
     main="Quartic")
plot(density(origin_ppp.km, 
             sigma=bw.ppl, 
             edge=TRUE, 
             kernel="disc"), 
     main="Disc")

There are no much differences among the 4 kernel methods. Sticking to gaussian for this analysis.

kde_origin.ppl <- density(origin_ppp.km, 
                               sigma=bw.ppl, 
                               edge=TRUE,
                               kernel="gaussian")

Comparing Fixed & Adaptive KDE

kde_origin_adaptive <- adaptive.density(origin_ppp.km, method="kernel")
par(mfrow=c(1,2))
plot(kde_origin.ppl, main = "Fixed bandwidth")
plot(kde_origin_adaptive, main = "Adaptive bandwidth")

Converting KDE into grid object

gridded_kde_origin_ppl <- as.SpatialGridDataFrame.im(kde_origin.ppl)
kde_origin_ppl_raster <- raster(gridded_kde_origin_ppl)
projection(kde_origin_ppl_raster) <- CRS("+init=EPSG:3414")

Visualising Output Map

tm_shape(kde_origin_ppl_raster) + 
  tm_raster("v", palette = "YlGnBu", title="") +
  tm_layout(
    legend.position = c("right", "bottom"), 
    main.title = "Pick Up Points Density",
    frame = FALSE)

Network Kernel Density Estimation (NKDE)

**Examine the structure of the output SpatialDataFrame

plot(singapore_roads)
plot(origin_df,add=T,col='#E26B51',pch = 19)
tm <- mpsz_sf %>%
  filter(PLN_AREA_N == "TAMPINES")
tm <- tm%>%
  st_union()
tm <- st_make_valid(tm)
length(which(st_is_valid(tm) == FALSE))
[1] 0
town <- mpsz_sf %>%
  filter(CA_IND == "Y")
town <- town%>%
  st_union()
town <- st_make_valid(town)
length(which(st_is_valid(town) == FALSE))
[1] 0
wood <- mpsz_sf %>%
  filter(PLN_AREA_N == "WOODLANDS")
wood <- wood%>%
  st_union()
wood <- st_make_valid(wood)
length(which(st_is_valid(wood) == FALSE))
[1] 0
cck_bp <- mpsz_sf %>%
  filter(PLN_AREA_N %in% c("CHOA CHU KANG","BUKIT PANJANG"))

cck_bp <- cck_bp%>%
  st_union()

cck_bp <- st_make_valid(cck_bp)
length(which(st_is_valid(cck_bp) == FALSE))
[1] 0
jg <- mpsz_sf %>%
  filter(PLN_AREA_N %in% c("JURONG WEST","JURONG EAST","CLEMENTI"))
jg <- jg%>%
  st_union()
jg <- st_make_valid(jg)
length(which(st_is_valid(jg) == FALSE))
[1] 0
tm <- st_transform(tm, crs = 3414)
town <- st_transform(town, crs = 3414)
wood <- st_transform(wood, crs = 3414)
cck_bp <- st_transform(cck_bp, crs = 3414)
jg <- st_transform(jg, crs = 3414)
tm_roads <- st_intersection(singapore_roads, tm)
town_roads <- st_intersection(singapore_roads, town)
wood_roads <- st_intersection(singapore_roads, wood)
cck_bp_roads <- st_intersection(singapore_roads, cck_bp)
jg_roads <- st_intersection(singapore_roads, jg)
par(mfrow=c(2,3))
plot(tm, main = "Tampines Area")
plot(town, main = "Town Area")
plot(wood, main = "Woodlands Area")
plot(cck_bp, main = "CCK  Area")
plot(jg, main = "Jurong Area")

plot(tm_roads)
plot(town_roads)
plot(wood_roads)
plot(cck_bp_roads)
plot(jg_roads)
tmap_mode('plot')

tmap_arrange(tm_shape(tm_roads) +
               tm_lines(col = "red") +
               tm_layout(title = "Tampines", title.size = 0.8),
             tm_shape(town_roads) +
               tm_lines(col = "blue") +
               tm_layout(title = "Town", title.size = 0.8), 
             tm_shape(wood_roads) +
               tm_lines(col = "green") +
               tm_layout(title = "Woodlands", title.size = 0.8),
             tm_shape(cck_bp_roads) +
               tm_lines(col = "orange") +
               tm_layout(title = "Choa Chu Kang", title.size = 0.8),
             tm_shape(jg_roads) +
               tm_lines(col = "purple") +
               tm_layout(title = "Jurong", title.size = 0.8), 
             asp=2, ncol=3)

tm_roads <- st_cast(tm_roads, "LINESTRING")
town_roads <- st_cast(town_roads, "LINESTRING")
wood_roads <- st_cast(wood_roads, "LINESTRING")
cck_bp_roads <- st_cast(cck_bp_roads, "LINESTRING")
jg_roads <- st_cast(jg_roads, "LINESTRING")
lixels_tm <- lixelize_lines(tm_roads, 
                         700, 
                         mindist = 350)
lixels_town <- lixelize_lines(town_roads, 
                         700, 
                         mindist = 350)
lixels_wood <- lixelize_lines(wood_roads, 
                         700, 
                         mindist = 350)
lixels_cck_bp <- lixelize_lines(cck_bp_roads, 
                         700, 
                         mindist = 350)
lixels_jg <- lixelize_lines(jg_roads, 
                         700, 
                         mindist = 350)
samples_tm <- lines_center(lixels_tm)
samples_town <- lines_center(lixels_town)
samples_wood <- lines_center(lixels_wood)
samples_cck_bp <- lines_center(lixels_cck_bp)
samples_jg <- lines_center(lixels_jg)
origin_tm = st_intersection(origin_df, tm)
origin_town = st_intersection(origin_df, town)
origin_wood = st_intersection(origin_df, wood)
origin_cck_bp = st_intersection(origin_df, cck_bp)
origin_jg = st_intersection(origin_df, jg)
tmap_mode('view')
tmap mode set to interactive viewing
tm_basemap("OpenStreetMap") +
tm_shape(wood_roads) +
  tm_lines(col = "green") +
  tm_shape(origin_wood) + 
  tm_dots()
tmap_mode('plot')
tmap mode set to plotting
densities_town <- nkde(town_roads, 
                  events = origin_town,
                  w = rep(1,nrow(origin_town)),
                  samples = samples_town,
                  kernel_name = "quartic",
                  bw = 300, 
                  div= "bw", 
                  method = "simple", 
                  digits = 1, 
                  tol = 1,
                  grid_shape = c(1,1), 
                  max_depth = 8,
                  agg = 5, #we aggregate events within a 5m radius (faster calculation)
                  sparse = TRUE,
                  verbose = FALSE)
samples_town$density <- densities_town*1000
lixels_town$density <- densities_town*1000
  • Tampines
  • Town
  • Wood
  • Choa Chu Kang
  • Jurong
tmap_mode('plot')
tm_shape(lixels_tm)+
  tm_lines(col="density")+
tm_shape(origin_tm)+
  tm_dots()

tmap_mode('plot')
tm_shape(lixels_town)+
  tm_lines(col="density")+
tm_shape(origin_town)+
  tm_dots()

tmap_mode('plot')
tm_shape(lixels_wood)+
  tm_lines(col="density")+
tm_shape(origin_wood)+
  tm_dots()

tmap_mode('plot')
tm_shape(lixels_cck_bp)+
  tm_lines(col="density")+
tm_shape(origin_cck_bp)+
  tm_dots()

tmap_mode('plot')
tm_shape(lixels_jg)+
  tm_lines(col="density")+
tm_shape(origin_jg)+
  tm_dots()